# -*- coding:utf-8 -*-
"""
BCBTB财经
https://www.bcbtb.com/
"""

from WriteData import writedata
from Tk import genearteMD5
from GetText import get_html_text
from cs import day
import re

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}


def get_dates_xw(url, headers):
    text = get_html_text(url, headers)
    if not text:
        return None
    regex = re.compile(r'<span>(\d{4}-\d{2}-\d{2})</span>')
    dates = re.findall(regex, text)
    return dates




def count_process_one():
    d1 = 0
    d2 = 0
    d3 = 0
    d4 = 0
    a = 1
    while 1:
        url = 'https://www.bcbtb.com/news/{}'
        url = url.format(a)
        dates = get_dates_xw(url, headers)
        if not dates:
            break
        print('---- BCBTB财经新闻获取第{}页----'.format(a))
        for date in dates:
            s = day(date)
            if s == 0:
                d1 += 1
            elif s == 1:
                d2 += 1
            elif s == 2:
                d3 += 1
            else:
                d4 += 1
        a += 1
        if d4 > 0:
            break
    return (d1, d2, d3)


def main():
    u = 'BCBTB财经'
    t1 = count_process_one()
    tk = genearteMD5(u)
    D = {tk: {"name": u, "today": t1[0], "yesterday": t1[1], "frontday": t1[2]}}
    writedata(D)
    return D


if __name__ == '__main__':
    print(main())
